// 文档 https://github.com/hooke007/MPV_lazy/wiki/4_GLSL

// CuNNy fast SOFT
// Copyright (c) 2024 funnyplanter

// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3.0 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program.  If not, see <https://www.gnu.org/licenses/>.
/* ------------------------------------------------------------------- */


//!DESC [CuNNy_fast_SOFT] -in
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND LUMA
//!SAVE in
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) F((LUMA_mul * texelFetch(LUMA_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(1, 1) + ivec2(0, 0), 0)).r)
shared F G[1][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
		}
	}
	barrier();
	F s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2];
	r0 += V4(3.010e-02, 1.564e-02, -1.653e-02, -3.702e-03) * s0_0_0;
	r1 += V4(-1.406e-02, 1.070e-02, -3.109e-02, 2.731e-02) * s0_0_0;
	r2 += V4(-5.163e-02, 4.491e-02, 7.853e-01, 2.529e-01) * s0_0_0;
	r0 += V4(2.803e-02, -1.895e-02, 1.012e+00, 2.149e-02) * s0_0_1;
	r1 += V4(1.870e-01, -2.768e-02, -2.353e-01, 2.040e-01) * s0_0_1;
	r2 += V4(-5.183e-01, -3.464e-02, -7.988e-01, -4.419e-02) * s0_0_1;
	r0 += V4(-2.810e-02, -2.463e-03, 1.985e-02, -3.392e-02) * s0_0_2;
	r1 += V4(6.055e-01, 1.673e-02, -1.892e-01, 4.952e-02) * s0_0_2;
	r2 += V4(-1.032e-01, -7.236e-02, 3.918e-03, -2.431e-02) * s0_0_2;
	r0 += V4(3.346e-01, 8.850e-01, 2.140e-03, -9.192e-03) * s0_1_0;
	r1 += V4(-5.193e-02, -2.364e-02, 4.263e-02, 7.263e-02) * s0_1_0;
	r2 += V4(-4.015e-03, -5.815e-03, -7.910e-01, -5.395e-01) * s0_1_0;
	r0 += V4(-8.111e-01, -8.756e-01, -9.627e-01, -6.735e-02) * s0_1_1;
	r1 += V4(-7.967e-01, -9.668e-01, 3.975e-01, -8.028e-01) * s0_1_1;
	r2 += V4(7.652e-01, 9.664e-01, 7.637e-01, -1.460e-01) * s0_1_1;
	r0 += V4(7.927e-02, -5.525e-03, -4.855e-02, -9.200e-01) * s0_1_2;
	r1 += V4(2.806e-02, 9.875e-01, 8.642e-03, 2.861e-01) * s0_1_2;
	r2 += V4(-5.948e-02, -4.044e-01, 2.062e-02, 9.010e-02) * s0_1_2;
	r0 += V4(-1.899e-02, 2.585e-02, 1.148e-02, 1.157e-02) * s0_2_0;
	r1 += V4(7.441e-02, 6.822e-03, -3.548e-02, -4.318e-02) * s0_2_0;
	r2 += V4(1.059e-02, -6.225e-02, -6.352e-04, 1.452e-01) * s0_2_0;
	r0 += V4(2.635e-01, -1.487e-02, -4.457e-02, 5.068e-02) * s0_2_1;
	r1 += V4(-7.955e-03, 1.923e-02, 1.438e-02, -1.157e-02) * s0_2_1;
	r2 += V4(-1.213e-01, -4.688e-01, 4.193e-02, 3.039e-01) * s0_2_1;
	r0 += V4(2.899e-02, -1.022e-02, 2.218e-02, 9.512e-01) * s0_2_2;
	r1 += V4(-2.496e-02, -2.301e-02, -5.402e-03, 8.511e-02) * s0_2_2;
	r2 += V4(8.476e-02, 3.721e-02, -2.634e-02, -4.782e-02) * s0_2_2;
	r0 += V4(-1.865e-03, 9.461e-04, -2.628e-04, 2.123e-04);
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(-8.868e-03, 5.735e-04, -8.273e-03, -1.885e-03);
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 += V4(1.006e-02, 3.972e-03, -2.517e-03, -8.864e-03);
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_fast_SOFT] -conv1
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND in
//!BIND LUMA
//!SAVE conv1
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(7.794e-02, -2.650e-01, -1.549e-01, 2.235e-01, 7.669e-02, 4.486e-02, 4.939e-02, -1.332e-01, 2.910e-03, -1.628e-02, -1.826e-02, -1.620e-01, -2.013e-01, -1.000e+00, -2.099e-01, -5.049e-01) * s0_0_0;
	r1 += M4(1.870e-01, -8.092e-02, -1.095e-01, 1.351e-01, -8.602e-02, 1.433e-02, 9.788e-02, -6.407e-02, -6.640e-02, 9.924e-02, -2.087e-01, 2.395e-02, -1.832e-01, -1.470e-01, -1.000e+00, 5.934e-02) * s0_0_0;
	r2 += M4(-2.966e-02, -1.905e-01, 3.156e-01, -1.305e-02, 3.086e-02, 4.033e-02, -8.145e-02, -2.168e-02, -2.380e-02, 1.908e-02, -5.397e-02, -1.005e-02, -4.435e-02, -1.000e+00, 4.751e-02, 1.611e-02) * s0_0_0;
	r0 += M4(-5.723e-02, -1.000e+00, -6.325e-02, -2.110e-01, -1.413e-01, -1.978e-01, -2.987e-02, -2.322e-01, -5.082e-02, -1.081e-01, 9.235e-02, -1.934e-01, -1.254e-01, -7.259e-02, 1.185e-01, -4.754e-01) * s0_0_1;
	r1 += M4(5.419e-01, -1.466e-01, -1.000e+00, -9.857e-02, 2.484e-01, 4.357e-02, -4.103e-01, 6.397e-02, 1.961e-01, -4.900e-02, -3.350e-01, -8.448e-02, -1.443e-01, -1.505e-02, -3.725e-01, 7.570e-04) * s0_0_1;
	r2 += M4(-2.347e-02, 1.172e-01, 1.226e-01, 2.809e-02, 6.467e-02, 3.290e-01, -2.548e-01, -1.135e-01, -4.749e-02, -1.289e-02, 1.840e-01, 1.915e-02, -7.721e-02, 4.806e-01, 1.482e-02, 3.444e-02) * s0_0_1;
	r0 += M4(-2.896e-01, -1.372e-01, -3.299e-02, 4.125e-02, 1.117e-02, 3.082e-02, -8.388e-02, -5.015e-01, 6.857e-02, 3.244e-02, -2.025e-02, -2.583e-01, 3.951e-02, -2.468e-02, 2.363e-03, -4.690e-02) * s0_0_2;
	r1 += M4(6.835e-02, -3.586e-01, 2.322e-01, 1.848e-02, -7.973e-02, 2.217e-01, -9.748e-02, 1.279e-02, -6.600e-03, 1.093e-02, -4.379e-01, -1.079e-01, 9.725e-02, -3.054e-02, -3.488e-04, -1.294e-02) * s0_0_2;
	r2 += M4(-1.127e-01, -6.169e-01, 3.780e-02, -9.353e-02, -7.181e-02, 2.255e-01, -2.322e-01, -9.600e-02, 4.511e-03, 7.173e-02, 1.341e-01, -1.281e-02, -3.368e-02, -4.067e-02, 2.468e-03, 2.087e-02) * s0_0_2;
	r0 += M4(-5.643e-02, 3.250e-02, 1.016e-01, 7.288e-01, -9.735e-02, 2.792e-02, -2.702e-02, -2.163e-01, 4.411e-02, 1.211e-01, -1.322e-01, -5.485e-01, -1.921e-01, -6.111e-02, 2.615e-01, 5.271e-01) * s0_1_0;
	r1 += M4(-4.878e-01, 2.863e-01, -1.013e-01, 7.005e-03, 3.290e-01, -9.448e-02, -1.840e-02, -4.801e-02, 6.617e-03, 1.645e-01, -9.413e-02, -3.232e-01, -1.000e+00, -4.144e-02, -2.291e-02, 2.766e-01) * s0_1_0;
	r2 += M4(-1.335e-02, -4.363e-01, -3.415e-01, 9.597e-02, 3.896e-03, 2.475e-01, 8.181e-02, -5.991e-02, -8.786e-02, 3.081e-01, -8.168e-02, -4.246e-02, 2.369e-02, -3.866e-01, 6.519e-01, 2.174e-01) * s0_1_0;
	r0 += M4(2.759e-01, -3.532e-01, -2.679e-01, -4.176e-01, 8.734e-01, 4.217e-02, 3.986e-01, 4.921e-01, -1.416e-01, 2.934e-01, 4.365e-01, 5.038e-01, 1.641e-02, -5.426e-02, 7.935e-02, -3.819e-01) * s0_1_1;
	r1 += M4(-1.000e+00, 6.343e-02, -4.551e-01, -2.240e-02, -1.000e+00, -5.699e-02, -5.626e-02, 6.348e-01, -1.000e+00, -2.454e-01, 4.503e-01, -1.296e-01, -5.884e-05, 3.259e-01, -6.144e-03, -2.781e-01) * s0_1_1;
	r2 += M4(-7.130e-02, 2.897e-01, -2.568e-01, -1.258e-01, 5.090e-02, -8.427e-02, -1.361e-01, 2.040e-02, 7.606e-02, -2.500e-01, 6.557e-02, -1.750e-02, -2.710e-01, 3.186e-01, -3.387e-01, 4.085e-01) * s0_1_1;
	r0 += M4(-3.369e-01, 1.020e-01, -1.304e-01, -2.917e-01, -1.922e-01, -2.647e-01, 3.197e-03, -4.445e-01, -2.246e-02, 1.923e-01, 4.869e-02, -2.239e-01, 5.587e-02, -4.008e-02, 9.630e-02, -8.447e-03) * s0_1_2;
	r1 += M4(-1.614e-01, -1.026e-01, -7.236e-02, -4.029e-02, 2.802e-01, -7.778e-01, -3.029e-01, 5.211e-02, 1.681e-01, 1.921e-01, 6.921e-02, -1.362e-01, 8.160e-02, -6.139e-02, 9.713e-03, 1.767e-02) * s0_1_2;
	r2 += M4(-1.000e+00, -3.309e-01, -9.687e-02, -4.395e-02, -1.000e+00, -1.000e+00, -1.531e-01, 1.508e-01, -2.930e-01, 1.499e-01, -7.645e-02, 6.298e-02, -2.122e-02, 3.471e-02, 1.490e-02, -1.205e-02) * s0_1_2;
	r0 += M4(2.816e-02, 2.255e-01, 9.190e-02, 3.987e-01, 1.684e-02, -6.939e-02, -5.307e-02, -2.103e-01, -1.296e-01, -1.172e-01, -6.857e-02, -3.202e-01, -3.813e-02, -1.537e-01, -8.225e-02, -3.661e-01) * s0_2_0;
	r1 += M4(-2.587e-01, 1.468e-01, 1.114e-01, 3.088e-01, -8.320e-03, -6.118e-02, -3.081e-02, -2.811e-01, -2.330e-02, 5.925e-02, -1.882e-02, -4.386e-01, 4.153e-02, -8.243e-02, -3.880e-02, 4.354e-02) * s0_2_0;
	r2 += M4(3.889e-02, -1.199e-01, -3.082e-01, -8.819e-02, 8.432e-03, 2.232e-01, 1.635e-01, 2.125e-02, -9.647e-02, 4.693e-01, 4.177e-01, -1.811e-01, 3.597e-02, -3.108e-02, -6.817e-01, -4.802e-01) * s0_2_0;
	r0 += M4(1.486e-01, 5.827e-01, 8.288e-02, 9.590e-01, 1.020e-01, -1.975e-01, 2.860e-02, -2.411e-01, 6.244e-02, -3.047e-01, -9.848e-02, -8.886e-01, 4.886e-02, -1.240e-01, 2.221e-02, -2.295e-01) * s0_2_1;
	r1 += M4(-1.738e-01, 2.716e-01, 9.524e-02, -1.261e-02, -2.599e-04, -1.584e-01, -6.298e-02, -8.716e-02, -8.484e-02, -4.298e-01, 1.265e-02, -2.365e-01, 5.880e-03, -1.488e-01, 7.315e-02, -4.863e-02) * s0_2_1;
	r2 += M4(-1.233e-01, -6.686e-02, 2.817e-01, 3.803e-01, 6.784e-03, -3.582e-01, 3.673e-01, -3.284e-01, 5.001e-02, -1.000e+00, -4.741e-01, -1.000e+00, -2.000e-02, 4.410e-02, -4.657e-01, -1.294e-01) * s0_2_1;
	r0 += M4(-1.548e-02, 1.146e-01, -7.471e-02, 4.539e-01, -2.207e-01, 3.016e-02, 1.623e-01, -1.140e-01, -6.168e-02, -1.459e-01, 1.584e-01, -1.762e-01, -7.614e-03, -1.638e-02, 4.285e-02, -1.234e-02) * s0_2_2;
	r1 += M4(5.935e-02, -2.026e-01, -8.815e-02, 1.051e-01, 1.003e-01, 4.000e-02, 1.332e-01, -6.208e-02, 3.193e-01, -1.394e-01, -2.448e-02, 1.232e-02, 5.592e-02, 9.527e-03, 4.696e-02, 1.487e-02) * s0_2_2;
	r2 += M4(-2.509e-01, -3.267e-02, 4.741e-01, -1.662e-01, -6.739e-02, 5.618e-02, -1.042e-01, -7.963e-02, -1.587e-01, 1.763e-01, -1.489e-01, -8.797e-02, -1.169e-02, 5.443e-02, -1.146e-01, 1.499e-03) * s0_2_2;
	r0 += M4(7.688e-02, 9.516e-02, -1.027e-02, 2.215e-02, 1.399e-01, 3.494e-02, -3.889e-02, -2.769e-01, -8.470e-02, -3.190e-02, 4.166e-03, 4.647e-01, -1.064e-01, -1.999e-01, 4.904e-02, 2.110e-01) * s1_0_0;
	r1 += M4(1.588e-02, 2.490e-02, 2.023e-01, 1.513e-01, -2.925e-01, -7.821e-02, -1.862e-01, -9.842e-02, 1.206e-01, -3.817e-02, -7.895e-02, 1.201e-01, 1.250e-01, -7.403e-02, 8.202e-02, -1.442e-01) * s1_0_0;
	r2 += M4(-5.816e-03, 2.479e-01, 3.051e-02, 3.875e-02, 3.769e-02, -1.543e-01, 6.082e-02, -3.929e-02, -9.569e-02, -4.781e-01, -2.314e-01, -7.930e-02, -2.405e-02, -4.251e-01, 2.268e-02, -6.396e-02) * s1_0_0;
	r0 += M4(-1.252e-01, -1.049e-01, 2.199e-03, 1.311e-01, -2.002e-02, -1.309e-01, 1.418e-01, -8.911e-02, 2.280e-01, -3.386e-02, -8.528e-02, -4.718e-01, -2.595e-03, -1.907e-01, -2.797e-02, 5.714e-01) * s1_0_1;
	r1 += M4(5.838e-02, -9.581e-02, 4.960e-01, 1.304e-01, -1.029e-01, 2.338e-02, -8.535e-01, -1.127e-01, 2.746e-01, 2.180e-01, 1.304e-01, -1.232e-02, -1.800e-01, 3.335e-01, 1.533e-01, -7.905e-02) * s1_0_1;
	r2 += M4(6.461e-02, 2.044e-02, -6.480e-02, -4.174e-02, 6.320e-02, 4.828e-01, 3.923e-01, -2.461e-02, 7.662e-02, 5.093e-01, 5.175e-01, 2.984e-01, -6.160e-02, -5.479e-01, -2.343e-01, 6.699e-02) * s1_0_1;
	r0 += M4(-2.388e-02, 8.916e-02, 1.522e-03, 1.895e-03, -5.185e-02, -1.035e-01, 9.872e-02, -2.420e-01, -1.239e-01, -1.037e-01, -6.862e-02, -3.783e-02, 1.567e-01, 3.096e-02, 2.357e-02, 7.775e-01) * s1_0_2;
	r1 += M4(3.492e-02, -8.638e-02, 1.400e-01, 6.427e-02, 5.246e-02, 4.694e-02, -8.762e-02, -7.990e-02, 2.299e-01, -1.666e-01, -1.178e-01, -1.289e-01, -9.417e-02, 3.186e-01, 3.328e-02, 1.045e-01) * s1_0_2;
	r2 += M4(-2.588e-02, 1.219e-01, -6.279e-02, 1.814e-02, -2.829e-02, 6.143e-02, 9.758e-02, 4.406e-03, -1.012e-02, -5.722e-01, -2.666e-02, 1.433e-02, 9.256e-03, 5.003e-02, -1.797e-01, 1.215e-01) * s1_0_2;
	r0 += M4(1.776e-02, -2.368e-02, 2.270e-01, 5.699e-01, -4.125e-01, -3.075e-01, -4.287e-01, -6.721e-01, -4.658e-03, 4.908e-01, 5.832e-01, -7.732e-02, -2.631e-01, -2.482e-01, -8.383e-02, -4.200e-01) * s1_1_0;
	r1 += M4(7.899e-01, -2.685e-01, 1.280e-01, 4.984e-01, -4.272e-01, -3.162e-01, -3.241e-03, -6.646e-01, -3.800e-02, 1.732e-01, 4.746e-01, 1.856e-01, 1.901e-01, -1.238e-01, -1.094e-01, 4.469e-02) * s1_1_0;
	r2 += M4(-4.814e-02, 4.169e-01, 1.985e-01, 8.694e-02, -3.157e-02, -1.000e+00, 3.271e-01, 2.047e-02, 1.070e-01, 1.000e+00, -1.102e-01, 1.289e-01, 2.231e-02, -6.694e-02, 1.317e-01, -7.040e-02) * s1_1_0;
	r0 += M4(1.232e-01, 2.781e-02, 2.651e-02, 1.268e-01, 7.992e-02, -1.548e-02, 6.435e-02, 1.762e-01, -1.629e-02, -2.662e-01, -3.223e-01, -3.095e-02, -3.726e-01, 4.556e-01, 1.155e-01, -1.293e-01) * s1_1_1;
	r1 += M4(-5.421e-01, -1.345e-01, 3.040e-01, 3.157e-01, -1.000e+00, 6.079e-01, 4.113e-01, 3.919e-02, 1.897e-01, -1.201e-01, -3.623e-01, -2.388e-01, 1.616e-01, 4.504e-01, 9.763e-02, -3.666e-01) * s1_1_1;
	r2 += M4(1.280e-01, -8.318e-02, -1.369e-01, 8.416e-02, 3.971e-01, -7.646e-01, 3.814e-01, 1.258e-01, -5.352e-04, -4.926e-01, 1.171e-01, 3.409e-02, 7.039e-02, 4.141e-01, -3.326e-01, -4.492e-02) * s1_1_1;
	r0 += M4(-3.262e-02, 8.140e-02, -1.140e-01, -7.301e-02, -1.621e-01, -3.353e-03, 7.882e-02, -1.475e-01, 8.207e-02, -4.268e-02, 8.601e-02, -4.439e-02, 6.118e-01, -1.167e-01, -5.907e-02, 7.909e-01) * s1_1_2;
	r1 += M4(-8.515e-03, -1.827e-01, 1.200e-01, 6.747e-02, -2.320e-02, -1.592e-01, 2.047e-02, -5.468e-02, -2.820e-01, 2.000e-01, 3.725e-02, 5.894e-02, -1.841e-01, 4.434e-01, -1.548e-01, 1.265e-01) * s1_1_2;
	r2 += M4(-1.648e-01, -2.471e-01, -1.264e-01, -3.805e-02, -3.149e-01, 2.065e-01, -5.297e-01, 4.774e-02, 4.481e-02, 3.135e-01, -9.468e-02, -4.164e-02, -1.000e+00, -2.441e-01, 2.127e-01, -1.556e-01) * s1_1_2;
	r0 += M4(-1.204e-01, 2.390e-01, 1.034e-01, 6.489e-02, 2.897e-02, -4.962e-02, -1.287e-01, -4.373e-01, -3.471e-01, -2.549e-01, 2.974e-02, -5.389e-01, -3.275e-02, -2.590e-01, -5.705e-02, 2.789e-01) * s1_2_0;
	r1 += M4(-1.374e-01, -1.746e-01, 8.909e-02, 5.832e-01, 2.432e-02, 1.470e-01, -2.954e-02, -1.182e-01, 2.390e-01, -2.942e-01, -9.946e-02, -1.182e-01, 1.820e-01, -2.404e-01, -1.206e-01, 1.244e-01) * s1_2_0;
	r2 += M4(3.852e-02, -1.000e+00, 3.734e-02, 3.194e-01, 3.795e-02, 6.264e-01, -4.797e-01, -2.647e-01, -5.537e-02, 1.540e-01, -1.737e-01, -7.121e-02, -4.421e-02, -5.940e-01, 6.660e-02, -8.166e-02) * s1_2_0;
	r0 += M4(1.582e-01, 6.610e-03, -9.160e-02, 5.893e-01, -7.005e-03, 2.424e-02, -5.114e-02, -4.089e-01, -2.950e-01, -3.946e-02, 3.790e-01, 4.930e-01, -1.441e-03, -5.444e-01, 1.307e-01, -3.871e-01) * s1_2_1;
	r1 += M4(1.759e-01, 2.460e-01, 6.418e-02, 2.129e-01, 1.538e-01, 1.226e-01, -1.208e-01, 5.364e-02, -7.281e-01, 2.799e-01, 9.555e-02, 3.386e-02, -1.717e-01, -2.478e-01, -6.372e-02, -2.683e-01) * s1_2_1;
	r2 += M4(1.393e-01, 1.383e-01, -4.189e-02, -2.852e-01, 2.654e-01, 3.455e-01, -5.290e-01, -2.284e-01, 3.372e-01, 8.870e-01, -5.166e-01, 4.834e-01, 8.045e-02, 7.787e-03, 1.629e-01, 3.658e-01) * s1_2_1;
	r0 += M4(-3.006e-02, 1.122e-02, -5.261e-02, -1.647e-01, -6.514e-02, 8.895e-02, -1.198e-02, -1.881e-01, 1.227e-01, 5.517e-02, -5.960e-03, 1.165e-01, 2.912e-03, -1.727e-01, 4.507e-02, 1.365e-01) * s1_2_2;
	r1 += M4(6.031e-02, -4.396e-02, 1.339e-03, -3.644e-03, 1.467e-01, 3.681e-02, 2.230e-02, 5.198e-02, -1.202e-01, -2.036e-01, 3.998e-02, -3.479e-02, -1.886e-01, -7.359e-02, 5.039e-03, -1.459e-01) * s1_2_2;
	r2 += M4(-4.021e-02, -2.669e-02, -2.283e-01, 3.761e-03, -8.451e-02, -4.437e-02, -8.358e-01, -1.792e-02, -2.030e-01, -2.089e-01, 7.720e-01, -2.252e-03, -1.008e-01, 1.586e-01, 5.315e-01, 5.511e-02) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(7.053e-02, 1.409e-01, -4.246e-03, -3.017e-01, 2.577e-02, 1.061e-01, 1.115e-01, 3.708e-01, 9.365e-03, -7.149e-03, -7.068e-03, 2.353e-02, -1.364e-02, 3.696e-02, -2.171e-02, -4.784e-03) * s0_0_0;
	r1 += M4(-5.606e-03, 2.073e-02, 2.682e-02, -1.945e-02, -4.979e-01, -7.399e-02, 1.276e-01, 1.062e-01, 3.333e-02, -1.567e-02, -2.176e-02, 4.674e-03, 1.081e-01, -1.311e-01, 3.931e-02, 7.224e-03) * s0_0_0;
	r2 += M4(6.330e-02, 2.761e-01, 2.222e-01, 2.382e-02, -4.580e-02, -8.604e-02, 3.178e-01, -5.682e-03, -1.274e-03, 1.071e-01, -6.001e-02, -5.010e-03, 6.373e-02, 1.803e-01, 2.323e-01, 2.898e-02) * s0_0_0;
	r0 += M4(-1.154e-01, -1.731e-01, 9.114e-02, 2.436e-01, 2.022e-02, 3.043e-01, 1.760e-01, -1.450e-01, 6.439e-02, 7.335e-02, -5.635e-03, -5.421e-02, -2.631e-01, 1.107e-01, 2.458e-02, 2.256e-01) * s0_0_1;
	r1 += M4(-4.292e-02, -6.663e-02, -1.175e-01, -1.122e-01, 1.584e-01, -9.733e-03, 7.523e-02, 1.886e-02, 1.105e-02, -4.439e-03, 1.119e-01, 8.468e-02, -2.330e-01, -3.836e-01, -4.962e-02, -2.992e-01) * s0_0_1;
	r2 += M4(-4.435e-02, 4.947e-02, -4.500e-01, -1.126e-01, 2.546e-02, -5.244e-01, -5.894e-02, -5.906e-02, 1.156e-02, 2.642e-01, 2.752e-02, 1.569e-02, 8.371e-02, -4.484e-01, -1.110e-01, -5.576e-03) * s0_0_1;
	r0 += M4(3.096e-02, 1.128e-01, -1.933e-02, 3.171e-02, -6.060e-02, 9.202e-03, 3.263e-02, -1.730e-01, -7.134e-02, -2.158e-02, 4.379e-02, 5.270e-02, 4.592e-01, 4.154e-01, -5.371e-01, 1.099e-01) * s0_0_2;
	r1 += M4(-3.303e-01, 4.161e-02, 8.691e-02, 1.362e-01, 1.477e-01, -3.561e-02, -2.386e-02, -1.123e-03, 1.536e-01, -7.402e-03, -1.216e-01, 1.409e-02, -5.769e-01, 5.046e-02, -1.786e-02, -1.456e-02) * s0_0_2;
	r2 += M4(2.519e-02, 9.238e-02, 6.653e-02, -5.017e-02, -1.547e-01, 1.644e-01, -7.598e-02, 1.244e-02, 1.411e-02, 2.373e-01, -6.111e-02, 4.473e-03, 2.607e-02, 4.630e-02, -2.528e-01, -1.252e-01) * s0_0_2;
	r0 += M4(-1.017e-01, 1.217e-01, -1.843e-01, -2.574e-02, 2.549e-01, -1.130e-01, 1.025e-01, 6.954e-02, -4.457e-02, 7.276e-02, -2.987e-02, -7.248e-02, -1.029e-01, 1.120e-02, 5.770e-02, 4.691e-02) * s0_1_0;
	r1 += M4(1.140e-01, -2.417e-02, 2.017e-01, 2.709e-01, -5.859e-02, 5.539e-02, -7.273e-02, -5.152e-02, 5.501e-02, -6.418e-02, 5.837e-02, 6.636e-02, -8.178e-02, 6.740e-02, -1.068e-01, -9.845e-02) * s0_1_0;
	r2 += M4(-6.019e-02, -7.209e-02, -1.957e-01, 3.129e-02, -4.429e-02, -3.714e-01, -1.181e-01, 2.465e-01, 1.613e-02, 2.919e-01, 2.137e-02, 3.025e-02, -7.048e-03, -4.068e-02, -9.487e-02, -6.291e-02) * s0_1_0;
	r0 += M4(1.083e-01, -4.678e-01, 6.242e-02, -6.415e-01, -2.319e-01, -1.040e-01, -2.863e-02, 1.074e-02, -1.192e-01, -1.056e-01, 1.587e-01, 4.403e-01, 4.502e-01, 1.607e-01, -2.201e-01, -5.491e-02) * s0_1_1;
	r1 += M4(1.393e-02, -8.547e-02, -6.157e-01, -2.434e-01, 2.149e-01, -1.777e-01, 1.052e-01, 4.420e-02, 1.847e-02, -4.546e-01, -1.136e-01, 2.168e-02, -3.537e-01, 2.415e-01, 2.472e-01, -2.271e-01) * s0_1_1;
	r2 += M4(-1.238e-01, -4.447e-01, -2.284e-01, -1.274e-01, -1.363e-01, 4.482e-01, -1.000e+00, -2.613e-01, 5.705e-02, -6.358e-01, 1.259e-01, 1.907e-01, 1.043e-04, -1.094e-01, 6.293e-02, -1.695e-01) * s0_1_1;
	r0 += M4(-5.511e-02, 8.629e-02, -8.897e-02, 3.851e-01, -1.036e-02, 1.846e-02, -3.848e-02, -1.814e-01, -4.296e-03, -2.845e-02, 8.209e-02, 4.456e-01, 7.131e-02, 2.705e-01, 1.503e-01, 7.930e-02) * s0_1_2;
	r1 += M4(-5.667e-02, -7.946e-02, 3.552e-02, -3.413e-02, -7.095e-02, 1.082e-01, 5.364e-03, -2.041e-03, 1.296e-01, -7.773e-02, -5.022e-02, 8.768e-02, 1.000e+00, 6.813e-02, -3.227e-01, 8.199e-02) * s0_1_2;
	r2 += M4(8.434e-02, 8.706e-02, 5.491e-01, 7.236e-04, 1.792e-01, -2.319e-02, -2.415e-01, 3.472e-02, 8.701e-02, -3.124e-01, 6.143e-02, 1.971e-01, 1.556e-01, 1.909e-02, 3.390e-01, 1.984e-01) * s0_1_2;
	r0 += M4(2.358e-02, 3.160e-02, -6.666e-02, 9.966e-02, -1.376e-02, -2.721e-02, 1.794e-02, -2.105e-02, 3.655e-02, -2.853e-02, -5.740e-02, -1.374e-02, 2.557e-02, -1.586e-01, 7.802e-03, -1.734e-02) * s0_2_0;
	r1 += M4(-2.202e-02, 2.606e-01, 2.447e-01, -1.056e-01, 7.672e-02, -5.410e-02, -7.035e-03, -1.585e-02, 2.232e-02, 3.732e-02, 6.476e-02, -1.332e-01, 1.093e-01, -3.819e-02, -2.670e-02, -1.308e-01) * s0_2_0;
	r2 += M4(-2.177e-02, 1.000e+00, -4.158e-01, -2.732e-02, 2.993e-02, -1.154e-01, 4.130e-02, -9.262e-02, -1.516e-02, 2.577e-01, 4.098e-02, 2.170e-03, -9.559e-03, -2.291e-01, 8.434e-02, 5.283e-02) * s0_2_0;
	r0 += M4(3.080e-02, 1.979e-01, -1.527e-01, -4.533e-01, 8.010e-03, -5.616e-03, -5.264e-02, -1.787e-02, 1.408e-01, -4.518e-02, 5.943e-02, 4.112e-01, -9.404e-03, -2.092e-01, 9.704e-02, -1.559e-01) * s0_2_1;
	r1 += M4(2.502e-01, 3.181e-01, 5.396e-02, -3.684e-02, -1.498e-01, 2.871e-02, -1.189e-02, 2.504e-02, -1.444e-01, -7.257e-02, 9.809e-02, 7.189e-02, -5.990e-02, 1.020e-01, 8.766e-02, -5.716e-01) * s0_2_1;
	r2 += M4(-1.272e-02, -7.772e-01, -5.698e-04, 4.644e-01, 1.256e-02, 6.175e-02, 1.808e-02, 2.918e-02, 1.204e-01, -6.947e-01, 6.576e-02, 4.112e-02, -8.897e-03, 7.950e-01, 4.633e-01, 5.406e-02) * s0_2_1;
	r0 += M4(-3.791e-02, 2.460e-02, -7.633e-02, 6.279e-01, 3.644e-03, -2.934e-02, 5.902e-02, -3.362e-02, -1.039e-01, -2.190e-01, 9.616e-02, 5.350e-01, -1.067e-01, 3.367e-01, -7.846e-02, 4.525e-01) * s0_2_2;
	r1 += M4(-1.158e-01, 5.937e-02, 1.570e-01, 9.755e-02, 7.924e-02, 1.257e-02, 5.116e-03, -1.091e-04, 8.452e-02, -3.783e-01, 2.286e-01, 3.639e-01, 7.670e-02, 2.785e-01, -2.221e-01, -9.525e-02) * s0_2_2;
	r2 += M4(2.345e-01, -2.777e-01, 2.448e-02, -1.116e-01, -4.471e-02, 7.723e-02, -7.624e-02, 1.306e-02, -3.564e-01, -3.483e-01, 5.363e-01, -4.497e-01, 1.110e-01, -2.825e-01, 2.608e-01, 2.170e-02) * s0_2_2;
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_fast_SOFT] -conv2
//!HOOK LUMA
//!COMPUTE 16 8 8 8
//!BIND conv1
//!BIND LUMA
//!SAVE conv2
//!WIDTH LUMA.w 2 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(2, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1;
	r0 = V4(0.0); r1 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(1.154e-02, 1.046e-02, 5.303e-03, 7.097e-02, -3.293e-02, -8.974e-02, -9.674e-03, 2.509e-02, 1.480e-02, 2.765e-02, 1.840e-03, -4.375e-02, 7.160e-03, -6.959e-02, -3.981e-02, -9.786e-02) * s0_0_0;
	r1 += M4(-1.216e-01, 2.691e-02, -4.038e-02, 3.139e-04, -2.933e-03, 5.467e-04, -8.937e-02, 3.250e-02, 3.920e-02, 1.411e-02, 9.407e-02, -3.980e-03, -4.562e-02, -9.392e-02, -8.352e-02, -1.041e-01) * s0_0_0;
	r0 += M4(5.796e-03, 4.214e-02, -9.790e-02, -1.255e-02, -6.178e-03, -1.374e-01, -9.850e-02, -1.915e-02, 3.044e-02, 9.447e-02, 1.082e-01, 9.513e-02, -4.431e-03, -7.739e-02, -7.106e-02, -7.933e-02) * s0_0_1;
	r1 += M4(-1.122e-01, 1.598e-02, 7.635e-02, -7.543e-02, 1.632e-02, 6.123e-02, -1.478e-02, 8.629e-02, 2.847e-02, -6.019e-02, -4.134e-02, -3.109e-02, -1.178e-01, 6.395e-02, -2.961e-01, 1.289e-02) * s0_0_1;
	r0 += M4(6.221e-03, 1.353e-02, 2.961e-02, 9.887e-02, 2.002e-02, -1.106e-02, -4.433e-02, 2.994e-02, 3.032e-03, 4.577e-03, 3.867e-02, 6.802e-03, -1.089e-01, -3.754e-02, -8.293e-02, -3.169e-02) * s0_0_2;
	r1 += M4(3.703e-02, 2.962e-02, 3.948e-01, 1.161e-02, -9.686e-03, 3.809e-02, -7.547e-02, 2.483e-02, 7.167e-03, 2.155e-02, 2.814e-02, 1.806e-02, -1.575e-02, -5.056e-02, -7.545e-02, -3.918e-02) * s0_0_2;
	r0 += M4(1.157e-01, 4.986e-02, -3.748e-02, 2.244e-02, 5.923e-03, 2.777e-02, -1.357e-02, -1.009e-01, -1.759e-02, -1.109e-01, 7.380e-02, 9.692e-03, -1.675e-01, -1.077e-01, -6.029e-02, -8.301e-02) * s0_1_0;
	r1 += M4(-8.647e-02, 4.538e-03, 1.138e-02, -2.912e-02, -4.588e-03, 6.212e-03, 5.055e-02, -2.192e-02, 2.240e-01, -2.629e-02, 2.379e-02, 1.270e-01, -1.704e-01, 3.963e-02, -8.947e-02, -1.180e-01) * s0_1_0;
	r0 += M4(-4.796e-01, 1.009e-01, 5.791e-02, -2.254e-01, 1.413e-01, 3.688e-01, -7.614e-02, -1.480e-01, -3.235e-01, -4.052e-01, -1.095e-01, 6.248e-02, -4.711e-02, -4.132e-01, -2.436e-01, -2.003e-01) * s0_1_1;
	r1 += M4(4.531e-02, -1.633e-01, -7.693e-02, 1.082e-01, -8.161e-02, -2.369e-01, 2.439e-01, -3.605e-01, -2.816e-01, 1.654e-01, -2.856e-02, -1.007e-01, -2.144e-01, -4.046e-01, 2.017e-01, -2.429e-01) * s0_1_1;
	r0 += M4(1.558e-01, 8.720e-02, 2.194e-01, 7.336e-01, -1.386e-01, -3.043e-02, -3.114e-02, -1.441e-01, 8.409e-02, -1.783e-02, -9.553e-02, 1.501e-01, -1.154e-01, -6.259e-02, 1.933e-01, -1.945e-01) * s0_1_2;
	r1 += M4(6.522e-01, 5.097e-01, -5.504e-02, 5.540e-01, -1.184e-01, -6.834e-02, 4.168e-02, -5.841e-02, 1.654e-01, 4.450e-02, 8.756e-02, 1.600e-01, -1.107e-01, 5.073e-02, -3.334e-02, -1.538e-01) * s0_1_2;
	r0 += M4(-1.695e-01, -2.677e-03, -2.495e-03, -1.093e-02, -1.049e-01, 1.607e-02, 3.232e-02, -2.263e-02, -1.721e-01, 1.245e-01, 5.884e-02, 6.439e-02, 2.016e-01, -3.063e-02, -7.261e-04, -9.246e-02) * s0_2_0;
	r1 += M4(1.296e-02, 3.283e-02, -5.909e-03, 1.454e-02, 1.235e-02, 3.460e-02, 1.038e-02, 7.165e-02, 3.020e-02, 2.622e-02, -8.226e-03, 7.110e-02, -5.068e-02, -5.394e-02, -1.004e-02, -5.434e-02) * s0_2_0;
	r0 += M4(2.509e-01, 2.222e-02, -3.973e-02, -3.361e-03, -2.051e-01, 6.916e-01, 3.643e-01, 1.695e-02, 3.917e-01, 9.119e-02, 1.655e-01, 2.486e-01, -3.183e-01, -1.107e-01, -7.061e-02, -2.532e-02) * s0_2_1;
	r1 += M4(-3.015e-02, -1.425e-02, 2.071e-02, -6.058e-02, 2.592e-02, 3.714e-01, 2.326e-02, 7.008e-01, 2.064e-01, 1.938e-01, 3.118e-02, 2.059e-01, -4.432e-02, 2.857e-02, -4.260e-02, -1.885e-01) * s0_2_1;
	r0 += M4(-1.407e-02, 1.508e-03, 5.534e-02, 1.504e-02, 2.471e-01, 9.104e-03, -8.336e-02, -5.899e-02, -6.813e-02, 7.008e-02, 8.359e-02, 6.517e-02, 2.200e-02, -7.490e-03, -3.672e-02, -6.154e-02) * s0_2_2;
	r1 += M4(3.107e-03, 8.990e-03, 7.450e-03, 1.151e-02, 1.479e-02, 8.181e-02, 1.458e-02, 2.521e-02, 6.094e-02, -3.388e-02, -1.403e-02, 3.266e-02, -4.658e-02, -6.913e-02, -1.006e-03, -2.432e-02) * s0_2_2;
	r0 += M4(7.323e-02, -4.259e-03, 4.753e-03, -4.161e-02, 6.497e-03, 3.262e-02, -1.934e-02, -5.799e-02, -1.996e-03, 1.240e-01, 2.797e-02, -1.520e-04, -1.911e-01, 5.141e-02, -1.777e-02, -6.794e-02) * s1_0_0;
	r1 += M4(-9.291e-04, 2.039e-02, -1.506e-01, -1.567e-02, 1.071e-02, -2.055e-02, 4.744e-02, -2.962e-02, 2.110e-02, -1.011e-02, 1.043e-01, 4.621e-03, 1.502e-01, -6.656e-02, 5.509e-02, -1.011e-02) * s1_0_0;
	r0 += M4(-4.440e-02, -3.510e-02, -1.851e-01, 3.080e-02, 1.574e-02, 8.524e-02, 6.097e-02, 5.642e-04, -3.373e-03, 6.444e-02, 8.683e-02, 2.704e-04, -3.229e-02, -2.056e-01, 1.265e-01, 1.084e-01) * s1_0_1;
	r1 += M4(9.969e-02, -1.590e-02, -1.159e-01, 5.784e-02, -9.280e-03, -8.481e-02, -6.072e-02, -1.908e-02, -5.775e-02, -3.774e-02, 1.142e-01, -8.886e-02, -1.401e-01, 2.341e-02, -5.338e-02, 8.922e-02) * s1_0_1;
	r0 += M4(1.604e-02, 1.293e-03, 1.691e-02, -1.794e-02, -4.949e-03, 6.674e-02, 5.721e-02, -9.249e-03, -2.175e-02, 2.256e-02, 2.749e-02, -3.034e-02, 9.578e-02, -5.122e-02, -1.313e-01, -1.089e-01) * s1_0_2;
	r1 += M4(8.380e-03, -1.623e-02, -1.013e-01, 1.634e-02, 1.966e-02, -3.293e-02, -2.510e-03, 4.030e-03, -7.487e-03, -2.693e-02, 6.804e-02, -2.330e-03, -3.395e-03, 1.824e-02, -1.351e-02, -3.238e-02) * s1_0_2;
	r0 += M4(1.717e-01, 6.548e-03, 1.300e-02, 1.240e-01, -9.491e-02, -1.482e-02, 3.808e-02, 3.381e-02, -3.009e-01, 3.011e-02, 6.477e-02, 4.173e-02, -4.728e-01, -9.111e-02, 3.951e-02, 2.183e-02) * s1_1_0;
	r1 += M4(1.351e-02, 6.013e-02, -7.235e-03, 4.471e-02, 1.753e-01, -9.392e-02, 5.305e-02, 5.955e-02, -2.528e-01, 5.314e-02, -1.333e-01, 1.355e-01, 1.612e-02, -7.244e-03, -4.038e-03, 5.095e-02) * s1_1_0;
	r0 += M4(4.139e-02, -6.042e-02, -1.353e-01, -4.782e-01, -4.429e-02, -8.073e-01, -1.000e-01, -2.693e-01, -1.421e-01, -1.650e-01, 8.151e-02, -1.582e-02, 4.413e-01, 2.684e-01, 2.740e-03, 1.248e-01) * s1_1_1;
	r1 += M4(-2.892e-01, -5.843e-01, -6.403e-02, -3.364e-01, -8.280e-01, 1.841e-01, -2.168e-01, -4.542e-01, 3.579e-01, 2.032e-01, -1.758e-01, 3.781e-01, -1.402e-01, 1.848e-01, -1.451e-02, -2.367e-01) * s1_1_1;
	r0 += M4(-4.375e-03, -7.608e-02, -1.205e-01, -1.324e-01, -3.449e-02, -8.453e-02, -1.539e-01, 3.298e-03, 6.522e-02, 4.261e-02, 7.434e-03, 2.289e-02, -2.651e-01, -5.673e-02, -9.700e-02, -1.260e-02) * s1_1_2;
	r1 += M4(-7.155e-02, -1.313e-01, -1.654e-02, -1.428e-01, 4.655e-02, -1.587e-02, 1.836e-02, -2.273e-03, -5.404e-02, 8.775e-02, -1.362e-01, 3.049e-02, -3.300e-02, -6.697e-01, 3.534e-02, 1.992e-02) * s1_1_2;
	r0 += M4(2.706e-02, -5.520e-02, -3.030e-02, 1.764e-02, -8.258e-02, -3.970e-02, 1.700e-02, 1.025e-02, 5.305e-02, -1.096e-01, -9.755e-02, -4.528e-02, -6.163e-01, -6.119e-02, -5.086e-02, 2.431e-02) * s1_2_0;
	r1 += M4(-3.116e-02, -2.694e-02, -2.073e-02, -6.657e-02, 5.128e-02, -1.244e-02, -3.747e-03, 3.023e-02, -4.098e-02, -1.463e-02, -8.036e-03, -1.491e-01, -2.580e-02, -2.372e-02, 1.570e-02, -8.791e-02) * s1_2_0;
	r0 += M4(-4.330e-02, -4.249e-03, -1.260e-01, -1.419e-01, -4.084e-02, 2.297e-02, -7.738e-02, -7.269e-02, 9.725e-03, 4.009e-02, -6.542e-02, -2.527e-01, -1.477e-02, -6.817e-02, -4.656e-02, -6.644e-02) * s1_2_1;
	r1 += M4(-6.601e-02, -1.822e-01, 1.398e-02, -6.148e-02, -7.354e-02, -5.687e-02, -3.139e-02, -4.762e-02, -2.507e-01, -5.293e-01, -5.276e-02, -6.373e-02, -1.556e-01, -7.885e-02, 1.157e-02, -6.529e-02) * s1_2_1;
	r0 += M4(-1.676e-01, -7.509e-04, -2.100e-02, -3.626e-02, -2.039e-02, 2.746e-02, -3.359e-03, -6.111e-02, -5.797e-02, -6.009e-02, 6.316e-02, 5.505e-03, -6.332e-02, -1.738e-02, -1.773e-01, 3.280e-02) * s1_2_2;
	r1 += M4(-9.305e-03, 3.287e-03, 4.755e-03, -3.171e-02, -3.811e-02, -1.514e-02, 3.286e-03, -2.253e-02, 4.822e-02, 7.077e-02, 7.918e-04, 4.081e-02, -3.099e-02, 1.193e-01, 2.210e-02, -5.732e-02) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(-6.004e-02, -6.185e-02, 4.395e-02, 8.812e-02, -3.723e-02, -4.481e-03, 2.394e-03, -1.993e-02, -2.487e-02, -1.315e-02, -5.285e-02, -4.823e-02, -8.029e-04, -7.113e-02, 2.422e-02, 8.961e-02) * s0_0_0;
	r1 += M4(-9.770e-02, 2.671e-02, -1.172e-01, 8.287e-02, 9.917e-03, -2.526e-02, -3.136e-02, -2.546e-02, 1.361e-02, -1.879e-02, -5.661e-02, -5.278e-02, -8.350e-02, 3.290e-02, 6.399e-02, 4.772e-02) * s0_0_0;
	r0 += M4(8.784e-03, 4.561e-02, -1.161e-03, 1.229e-01, -1.620e-02, 3.373e-03, -1.482e-02, -6.696e-02, -3.371e-02, -3.769e-02, -6.347e-02, -1.911e-01, -4.501e-02, -4.522e-02, -1.122e-01, -3.995e-02) * s0_0_1;
	r1 += M4(2.924e-01, -1.121e-02, 2.085e-01, 7.885e-02, -1.234e-01, 5.289e-03, -1.482e-01, -4.935e-02, -1.275e-01, -3.266e-02, 3.781e-02, -4.739e-02, 1.271e-01, 2.684e-02, 6.621e-01, -1.417e-02) * s0_0_1;
	r0 += M4(7.351e-02, 3.380e-02, 2.100e-02, 1.086e-03, -2.865e-02, -5.394e-03, -2.441e-02, -1.230e-02, 8.184e-02, -3.409e-02, 3.310e-02, 1.023e-01, -4.526e-02, -1.306e-02, 1.890e-02, -2.351e-02) * s0_0_2;
	r1 += M4(-6.129e-03, 5.204e-02, 1.444e-03, 5.714e-03, -6.090e-02, -2.047e-02, -7.070e-02, -2.932e-02, -1.117e-02, 1.628e-02, -7.287e-03, -9.361e-02, -2.324e-02, -5.087e-02, 2.047e-02, -1.233e-02) * s0_0_2;
	r0 += M4(2.548e-01, 1.590e-02, -1.018e-01, -5.346e-02, 9.397e-03, -6.891e-02, -2.443e-02, -9.875e-02, -2.190e-02, -8.427e-02, -2.002e-02, 8.782e-04, -1.236e-01, 2.785e-01, 3.247e-02, -1.342e-01) * s0_1_0;
	r1 += M4(-5.286e-02, 8.358e-03, 3.503e-02, -1.529e-01, 5.407e-03, -2.624e-02, -6.946e-03, -5.231e-02, -8.827e-02, -4.247e-02, 4.175e-02, -1.075e-01, 8.488e-03, 5.008e-02, 1.798e-02, 2.936e-02) * s0_1_0;
	r0 += M4(-1.458e-01, -1.948e-01, 7.764e-01, 6.265e-02, -3.243e-01, -3.518e-01, -1.445e-01, -2.794e-01, -4.258e-01, 3.404e-01, 4.372e-02, -3.009e-01, -4.965e-01, 7.746e-01, 1.870e-01, 1.977e-01) * s0_1_1;
	r1 += M4(7.451e-01, 4.365e-01, 1.784e-01, 4.495e-01, -3.090e-01, -1.442e-01, -1.039e-01, -2.291e-01, 2.904e-01, -5.022e-01, 5.362e-02, -5.491e-02, -5.563e-02, 3.350e-01, -4.758e-02, 1.572e-01) * s0_1_1;
	r0 += M4(5.567e-02, 2.210e-02, -5.635e-02, -6.733e-03, -1.091e-01, -1.615e-01, -1.086e-01, -6.289e-02, 1.786e-01, 4.148e-02, 9.399e-02, 3.963e-02, -2.265e-02, 9.031e-02, 1.238e-01, -3.518e-02) * s0_1_2;
	r1 += M4(-5.134e-02, -8.515e-02, 9.008e-04, -4.129e-03, -2.849e-01, -1.637e-02, -1.175e-01, -1.736e-01, 6.855e-03, 1.126e-01, -1.965e-02, 1.442e-02, -2.478e-02, 3.142e-02, 1.605e-02, -9.447e-03) * s0_1_2;
	r0 += M4(-2.126e-01, -3.496e-05, 7.904e-03, -1.377e-02, -2.704e-01, -1.782e-02, 3.739e-03, -3.484e-02, -2.596e-02, -1.356e-01, 4.045e-02, -7.001e-02, 2.535e-02, 1.654e-02, -2.508e-02, 5.756e-03) * s0_2_0;
	r1 += M4(-2.475e-02, -3.033e-02, 1.818e-04, 1.009e-02, -1.801e-02, -3.556e-02, 5.992e-03, 9.538e-03, 4.850e-02, 3.832e-02, -2.159e-02, 5.669e-02, 1.646e-03, 2.606e-02, 1.871e-02, -1.390e-02) * s0_2_0;
	r0 += M4(8.763e-02, 1.607e-02, 5.261e-02, 9.401e-02, -2.087e-01, -2.271e-01, -1.361e-01, -2.093e-01, -3.541e-01, 1.928e-02, -1.251e-03, -3.105e-03, 7.800e-02, -8.081e-03, 9.578e-03, -6.866e-02) * s0_2_1;
	r1 += M4(1.159e-01, 2.026e-02, 2.220e-02, 5.504e-02, -1.741e-01, -4.940e-02, -1.417e-02, -1.571e-01, 5.245e-03, 1.135e-01, 3.079e-02, 5.878e-02, -5.373e-02, -1.020e-01, 1.488e-02, 1.288e-02) * s0_2_1;
	r0 += M4(5.682e-03, -1.884e-02, 8.631e-02, 7.982e-02, -1.103e-01, -8.902e-02, -8.106e-02, -4.754e-02, 3.009e-02, 1.272e-02, -2.533e-02, -5.367e-03, 3.772e-02, -5.873e-02, 2.470e-03, 2.560e-02) * s0_2_2;
	r1 += M4(5.028e-02, 5.029e-02, 2.166e-05, 4.218e-02, -5.869e-02, -4.856e-02, -8.471e-03, -6.222e-02, 1.361e-03, -4.756e-02, 9.799e-03, -1.115e-03, 1.218e-02, 3.895e-02, 1.305e-02, -9.281e-03) * s0_2_2;
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
}

//!DESC [CuNNy_fast_SOFT] -out-shuffle
//!HOOK LUMA
//!COMPUTE 16 16 8 8
//!BIND conv2
//!BIND LUMA
//!WIDTH LUMA.w 2 *
//!HEIGHT LUMA.h 2 *
//!COMPONENTS 1
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((conv2_mul * texelFetch(conv2_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(2, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((conv2_mul * texelFetch(conv2_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(2, 1) + ivec2(1, 0), 0)))
shared V4 G[2][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(2, 2);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0;
	r0 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(-8.745e-02, 3.383e-02, 4.779e-03, 1.345e-02, -1.868e-03, 2.867e-03, -2.889e-04, 1.115e-03, 6.221e-02, 2.749e-03, -1.133e-02, -4.452e-03, -3.269e-02, -3.297e-04, 7.234e-03, -4.700e-03) * s0_0_0;
	r0 += M4(4.776e-01, -2.351e-01, -4.595e-02, -1.193e-01, -2.361e-02, -2.980e-02, 9.339e-04, 3.573e-03, 4.443e-03, -5.262e-02, 2.593e-04, -4.259e-02, 2.009e-02, 3.289e-02, -1.821e-03, 1.233e-02) * s0_0_1;
	r0 += M4(-6.475e-03, 9.050e-02, 1.046e-02, 8.310e-02, 1.110e-02, 1.366e-02, -7.064e-04, 6.121e-03, -1.315e-03, -1.964e-03, 8.686e-05, -1.607e-03, 6.700e-03, 2.105e-02, -1.192e-03, 2.453e-03) * s0_0_2;
	r0 += M4(-2.723e-02, 1.998e-02, -4.554e-02, 2.205e-02, -4.439e-02, -4.404e-03, -2.104e-02, 8.697e-03, 2.229e-01, 1.297e-02, 3.286e-01, 9.755e-03, -2.192e-01, 4.147e-03, -1.890e-01, -1.323e-03) * s0_1_0;
	r0 += M4(9.382e-03, 4.344e-02, 1.685e-01, 7.939e-02, -3.460e-01, -2.019e-01, 4.834e-01, 1.129e-01, -2.867e-02, -2.495e-01, -1.358e-02, 2.568e-01, 2.339e-01, -4.131e-01, 4.346e-01, 6.522e-03) * s0_1_1;
	r0 += M4(2.435e-04, -3.573e-03, -3.126e-03, 1.075e-02, -3.135e-03, -6.763e-02, -5.822e-03, 1.431e-01, -1.461e-04, -5.105e-03, 2.164e-03, -2.817e-02, -7.371e-03, 1.032e-01, -1.119e-03, 1.146e-01) * s0_1_2;
	r0 += M4(8.547e-04, 6.011e-04, 1.048e-05, 1.014e-02, -4.715e-04, -1.370e-03, -2.071e-02, -1.915e-03, -1.364e-02, -2.617e-03, 1.969e-02, -4.878e-03, 2.998e-03, 6.190e-03, -3.958e-02, 1.057e-02) * s0_2_0;
	r0 += M4(7.958e-04, -5.562e-04, 5.722e-03, 1.152e-02, 1.077e-02, 2.148e-03, -1.172e-01, -6.975e-02, -4.030e-03, -9.735e-03, -6.964e-03, -7.798e-02, -7.563e-03, 1.614e-02, 1.546e-02, -1.085e-01) * s0_2_1;
	r0 += M4(2.204e-04, 1.508e-04, 1.628e-05, 1.893e-03, -4.284e-04, 5.033e-03, -7.737e-03, -3.635e-02, 1.147e-04, -2.175e-03, 1.822e-03, 2.527e-04, 7.215e-04, 1.079e-02, -9.029e-04, 2.809e-02) * s0_2_2;
	r0 += M4(2.697e-03, -1.313e-03, -1.015e-03, 4.832e-04, 1.273e-02, -1.454e-02, -1.164e-02, 1.968e-04, -1.176e-05, 3.554e-06, -1.848e-07, 6.425e-06, -6.480e-02, 8.408e-03, 5.938e-03, 1.289e-04) * s1_0_0;
	r0 += M4(2.913e-02, 4.024e-02, -2.495e-03, 7.654e-03, 4.229e-03, -9.018e-02, -8.102e-03, -1.311e-02, -1.756e-03, -1.098e-03, 5.380e-06, -2.058e-05, -2.447e-01, -2.065e-01, 6.129e-03, 7.875e-03) * s1_0_1;
	r0 += M4(-7.388e-04, -6.594e-03, -3.001e-04, -2.827e-03, 1.036e-03, 3.589e-03, -1.856e-03, -3.167e-03, 3.969e-04, -1.935e-06, -3.594e-06, 2.495e-07, 4.183e-03, -4.137e-02, 1.717e-03, 8.706e-03) * s1_0_2;
	r0 += M4(4.236e-02, -7.043e-03, 2.397e-02, 6.630e-03, 7.486e-02, -2.240e-02, -6.752e-03, -4.068e-03, 4.342e-02, -4.620e-03, 8.827e-03, 3.317e-04, -8.227e-02, 1.475e-03, -7.788e-02, 6.877e-03) * s1_1_0;
	r0 += M4(-5.452e-01, 3.314e-01, -9.257e-02, 1.323e-01, 3.059e-02, 7.693e-02, 3.600e-02, -6.459e-01, 2.915e-01, 3.681e-01, 2.526e-04, 1.969e-02, 4.150e-01, 3.682e-01, -2.828e-01, -2.700e-02) * s1_1_1;
	r0 += M4(1.040e-02, -1.398e-01, -1.829e-03, -2.740e-02, 1.530e-03, 1.808e-02, 4.368e-05, 4.800e-02, -1.787e-02, 3.889e-02, 7.303e-04, -1.238e-02, -4.286e-03, 4.341e-02, 6.535e-03, -7.488e-02) * s1_1_2;
	r0 += M4(4.696e-04, -2.962e-03, 8.343e-03, 1.359e-03, -8.558e-04, -3.073e-03, -1.854e-04, -1.114e-02, -4.766e-02, 2.779e-03, -1.173e-01, -4.016e-02, 8.598e-03, 3.102e-03, 9.206e-03, 6.121e-03) * s1_2_0;
	r0 += M4(1.529e-02, -7.346e-03, -1.218e-01, 7.152e-02, 3.859e-03, 4.380e-03, 1.324e-02, 3.100e-02, -4.309e-02, -1.859e-02, -2.529e-01, 2.073e-01, -1.059e-02, 2.768e-03, 1.333e-01, 1.159e-01) * s1_2_1;
	r0 += M4(-6.572e-04, -7.312e-03, -3.383e-03, -5.428e-02, -6.678e-04, 9.883e-04, -2.315e-03, 2.015e-03, 4.007e-04, -7.839e-03, -4.416e-03, -6.325e-02, -3.776e-05, -4.929e-03, 6.210e-03, 3.366e-02) * s1_2_2;
	r0 += V4(-1.011e-09, -5.997e-09, 3.794e-09, 4.918e-10);
	vec2 opt = 0.5 * LUMA_pt;
	vec2 fpos = (vec2(opos) + vec2(0.5)) * opt;
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0.x + LUMA_tex(fpos + vec2(0.0, 0.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r0.y + LUMA_tex(fpos + vec2(1.0, 0.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(0, 1), vec4(r0.z + LUMA_tex(fpos + vec2(0.0, 1.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(1, 1), vec4(r0.w + LUMA_tex(fpos + vec2(1.0, 1.0) * opt).r, 0.0, 0.0, 1.0));
}
